Find data from TCGA and report normalized expression of PD-1 and PD-L1 (aka PDCD1 and CD274).
36 cohorts were found, and the median expression values are below. The cohorts with highest co-expression are THYM and DLBC. Also of potential interest is cohort KICH which is anti-corrolated, having very high expression of PD-L1, but low expression of PD-1.
library(FirebrowseR)
library(tidyverse)
library(plotly)
library(downloadthis)
library(DT)
extract_Exp <- function(tmp.Pats, diff.Exp.Genes, cohort_name){ #Given sample barcode, gene id, and cohort, return the gene expression matrix
all.Found = F
page.Counter = 1
mRNA.Exp = list()
page.Size = 2000
while(all.Found == F){
mRNA.Exp[[page.Counter]] = Samples.mRNASeq(format = "csv",
gene = diff.Exp.Genes,
cohort = cohort_name,
tcga_participant_barcode =
tmp.Pats$tcga_participant_barcode,
page_size = page.Size,
page = page.Counter)
if(nrow(mRNA.Exp[[page.Counter]]) < page.Size)
all.Found = T
else
page.Counter = page.Counter + 1
}
return(mRNA.Exp)
}
cohorts = Metadata.Cohorts(format = "csv") # Download all available cohorts
immuno.Genes = c("PDCD1", "CD274") #The official gene symbol of PD-1 and PD-L1
slide.size = 100 #number of tcga barcodes for Gene expression extract. FirebrowseR always return error if extract expression data for the whole cohort. Therefore I break it into several slides of small number of samples.
cohorts = cohorts[-10,] #remove FPPP. - no samples available
cohorts = cohorts[-17,] #remove LAML. - no samples available
nCohorts = nrow(cohorts)
for (i in 1:nCohorts){ #loop all the cohorts
cancer.Type=cohorts[[1]][i]
#find the tcga barcode for the samples in the specific cancer cohorts
all.Received = F
page.Counter = 1
page.size = 150
cancer.Pats = list()
while(all.Received == F){
cancer.Pats[[page.Counter]] = Samples.Clinical(format = "csv",
cohort = cancer.Type,
page_size = page.size,
page = page.Counter)
if(page.Counter > 1)
colnames(cancer.Pats[[page.Counter]]) = colnames(cancer.Pats[[page.Counter-1]])
if(nrow(cancer.Pats[[page.Counter]]) < page.size){
all.Received = T
} else{
page.Counter = page.Counter + 1
}
}
cancer.Pats = do.call(rbind, cancer.Pats)
nslide = floor(nrow(cancer.Pats) / slide.size) #find the number of slides based on sample size for each slide
if(exists(deparse(substitute(immuno.Genes.Exp)))){ #remove the immuno.Genes.Exp from last round
rm(immuno.Genes.Exp)
}
if(nslide > 0){
for (j in 1:nslide){
tmp.Pats = cancer.Pats[((j-1)*slide.size+1):(j*slide.size),]
mRNA.Exp = extract_Exp(tmp.Pats, immuno.Genes, cancer.Type)
if (j==1){
immuno.Genes.Exp = mRNA.Exp
}else{
immuno.Genes.Exp = append(immuno.Genes.Exp,mRNA.Exp)
}
}
}
if(nrow(cancer.Pats)>nslide*slide.size){
tmp.Pats=cancer.Pats[(nslide*slide.size+1):nrow(cancer.Pats),]
mRNA.Exp = extract_Exp(tmp.Pats, immuno.Genes, cancer.Type)
if(exists(deparse(substitute(immuno.Genes.Exp)))){
immuno.Genes.Exp = append(immuno.Genes.Exp,mRNA.Exp)
}else{
immuno.Genes.Exp = mRNA.Exp
}
}
immuno.Genes.Exp = do.call(rbind,immuno.Genes.Exp)
assign(cancer.Type,immuno.Genes.Exp)
}
#Calculate the median exp value for PD-1 and PD-L1 for each cohort
PD1_exp = matrix(0,nrow=nCohorts,ncol=1)
PDL1_exp = matrix(0,nrow=nCohorts,ncol=1)
for(i in 1:nCohorts){
mRNA.exp = get(cohorts[[1]][i])
PD1_exp[i] = median(as.numeric(mRNA.exp$expression_log2[which(mRNA.exp$gene == "PDCD1" & mRNA.exp$sample_type=="TP")]),na.rm=T)
PDL1_exp[i] = median(as.numeric(mRNA.exp$expression_log2[which(mRNA.exp$gene == "CD274" & mRNA.exp$sample_type=="TP")]),na.rm=T)
}
mean_expression <- tibble(Cohort = cohorts[[1]], PD1 = PD1_exp[,1], PDL1 = PDL1_exp[,1]) %>%
mutate(Combined = PD1 + PDL1) %>% arrange(desc(Combined))
p <- ggplot(mean_expression, aes(PD1, PDL1, tooltip = Cohort))+ geom_point(color='darkblue', size=3) +
xlab("PD-1 Expression") + ylab("PD-L1 Expression") +
theme_minimal()
ggplotly(p)
datatable(mean_expression, filter='bottom')%>%
formatRound('PD1', 3) %>%
formatRound('PDL1', 3) %>%
formatRound('Combined', 3)
mean_expression %>%
downloadthis::download_this(
output_name = "PD1_PDL1_expression_data",
output_extension = ".csv",
button_label = "Download data",
button_type = "warning",
has_icon = TRUE,
icon = "fa fa-save",
self_contained = TRUE
)
This analysis was substantially influenced by the work of Wenhui Wang